import os
import pandas as pd

# 主文件夹路径
base_folder = "D:\\mydata\\生态项目相关\\数据集\\好快"

# 子文件夹名称
subfolders = ["landset8", "landset5", "landset7"]
filename = "summary_statistics_filled_nan.csv"

# 用于存储数据的列表
all_dfs = []

# 遍历每个子文件夹
for sub in subfolders:
    file_path = os.path.join(base_folder, sub, filename)
    if os.path.exists(file_path):
        try:
            df = pd.read_csv(file_path)
            df['source'] = sub  # 增加一列用于标注来源
            all_dfs.append(df)
            print(f"已读取文件: {file_path}")
        except Exception as e:
            print(f"读取 {file_path} 时出错: {e}")
    else:
        print(f"文件不存在: {file_path}")

# 拼接所有 DataFrame
if all_dfs:
    combined_df = pd.concat(all_dfs, ignore_index=True)

    # 按日期排序（如果 date 是字符串则转为 datetime）
    if not pd.api.types.is_datetime64_any_dtype(combined_df['date']):
        combined_df['date'] = pd.to_datetime(combined_df['date'])
    combined_df = combined_df.sort_values(by='date')

    # 保存合并后的文件
    output_path = os.path.join(base_folder, "578whole_nan.csv")
    combined_df.to_csv(output_path, index=False)
    print(f"\n合并完成，结果保存至: {output_path}")
else:
    print("未能找到任何 summary_statistics_filled.csv 文件。")
